from IPython.display import Image
from IPython.core.display import HTML
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from shapely import wkt, LineString
import matplotlib.pyplot as plt
import ipywidgets
import osmnx as ox
import networkx as nx
import re

def combine_taxi_dfs() -> pd.DataFrame:
dfs = []
for i in range(1,13):
df = pd.read_csv(f'taxi/Yellow_Taxi_Trip_Data_{i}_2018.csv', infer_datetime_format=True)
dfs.append(df.sample(50000))
return dfs
combined = pd.concat(combine_taxi_dfs()).sort_values(by='tpep_pickup_datetime')
combined['tripID'] = combined.index
combined
| VendorID | tpep_pickup_datetime | tpep_dropoff_datetime | passenger_count | trip_distance | RatecodeID | store_and_fwd_flag | PULocationID | DOLocationID | payment_type | fare_amount | extra | mta_tax | tip_amount | tolls_amount | improvement_surcharge | total_amount | tripID | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 19061600 | 1 | 01/01/2018 01:00:10 AM | 01/01/2018 01:21:27 AM | 1 | 5.90 | 1 | N | 140 | 223 | 1 | 20.5 | 0.5 | 0.5 | 4.35 | 0.0 | 0.3 | 26.15 | 19061600 |
| 19061552 | 2 | 01/01/2018 01:00:20 AM | 01/01/2018 01:08:12 AM | 1 | 2.52 | 1 | N | 233 | 263 | 2 | 9.0 | 0.5 | 0.5 | 0.00 | 0.0 | 0.3 | 10.30 | 19061552 |
| 9246627 | 2 | 01/01/2018 01:00:20 PM | 01/01/2018 01:11:21 PM | 1 | 2.15 | 1 | N | 13 | 79 | 1 | 10.0 | 0.0 | 0.5 | 1.20 | 0.0 | 0.3 | 12.00 | 9246627 |
| 9350481 | 1 | 01/01/2018 01:00:40 AM | 01/01/2018 01:10:07 AM | 1 | 0.70 | 1 | N | 234 | 164 | 2 | 7.5 | 0.5 | 0.5 | 0.00 | 0.0 | 0.3 | 8.80 | 9350481 |
| 19061414 | 2 | 01/01/2018 01:00:44 AM | 01/01/2018 01:08:32 AM | 1 | 0.84 | 1 | N | 170 | 161 | 1 | 6.5 | 0.5 | 0.5 | 0.00 | 0.0 | 0.3 | 7.80 | 19061414 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 142663 | 1 | 12/31/2018 12:56:20 PM | 12/31/2018 01:15:24 PM | 1 | 1.80 | 1 | N | 43 | 161 | 1 | 13.5 | 0.0 | 0.5 | 2.85 | 0.0 | 0.3 | 17.15 | 142663 |
| 142435 | 2 | 12/31/2018 12:57:32 PM | 12/31/2018 01:03:57 PM | 1 | 1.22 | 1 | N | 233 | 224 | 1 | 6.5 | 0.0 | 0.5 | 1.46 | 0.0 | 0.3 | 8.76 | 142435 |
| 142411 | 2 | 12/31/2018 12:57:41 PM | 12/31/2018 01:03:23 PM | 2 | 0.83 | 1 | N | 229 | 161 | 1 | 5.5 | 0.0 | 0.5 | 1.89 | 0.0 | 0.3 | 8.19 | 142411 |
| 142202 | 2 | 12/31/2018 12:58:56 PM | 12/31/2018 01:10:48 PM | 1 | 0.91 | 1 | N | 142 | 143 | 1 | 9.0 | 0.0 | 0.5 | 2.45 | 0.0 | 0.3 | 12.25 | 142202 |
| 202149 | 2 | 12/31/2018 12:59:06 AM | 12/31/2018 01:03:49 AM | 1 | 0.66 | 1 | N | 142 | 163 | 2 | 5.0 | 0.5 | 0.5 | 0.00 | 0.0 | 0.3 | 6.30 | 202149 |
600000 rows × 18 columns
nyc_gdf = gpd.read_file('NYC_Taxi_Zones.geojson')
nyc_gdf.set_index('objectid')
nyc_gdf
| shape_area | objectid | shape_leng | location_id | zone | borough | geometry | |
|---|---|---|---|---|---|---|---|
| 0 | 0.0007823067885 | 1 | 0.116357453189 | 1 | Newark Airport | EWR | MULTIPOLYGON (((-74.18445 40.69500, -74.18449 ... |
| 1 | 0.00486634037837 | 2 | 0.43346966679 | 2 | Jamaica Bay | Queens | MULTIPOLYGON (((-73.82338 40.63899, -73.82277 ... |
| 2 | 0.000314414156821 | 3 | 0.0843411059012 | 3 | Allerton/Pelham Gardens | Bronx | MULTIPOLYGON (((-73.84793 40.87134, -73.84725 ... |
| 3 | 0.000111871946192 | 4 | 0.0435665270921 | 4 | Alphabet City | Manhattan | MULTIPOLYGON (((-73.97177 40.72582, -73.97179 ... |
| 4 | 0.000497957489363 | 5 | 0.0921464898574 | 5 | Arden Heights | Staten Island | MULTIPOLYGON (((-74.17422 40.56257, -74.17349 ... |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 258 | 0.000168611097013 | 256 | 0.0679149669603 | 256 | Williamsburg (South Side) | Brooklyn | MULTIPOLYGON (((-73.95834 40.71331, -73.95681 ... |
| 259 | 0.000394552487366 | 259 | 0.126750305191 | 259 | Woodlawn/Wakefield | Bronx | MULTIPOLYGON (((-73.85107 40.91037, -73.85207 ... |
| 260 | 0.000422345326907 | 260 | 0.133514154636 | 260 | Woodside | Queens | MULTIPOLYGON (((-73.90175 40.76078, -73.90147 ... |
| 261 | 0.0000343423231652 | 261 | 0.0271204563616 | 261 | World Trade Center | Manhattan | MULTIPOLYGON (((-74.01333 40.70503, -74.01327 ... |
| 262 | 0.000122330270966 | 262 | 0.0490636231541 | 262 | Yorkville East | Manhattan | MULTIPOLYGON (((-73.94383 40.78286, -73.94376 ... |
263 rows × 7 columns
nyc_gdf.explore()
neighbor_dict = {}
for index, zone in nyc_gdf.iterrows(): # pls don't be mad
# get 'not disjoint' zones
neighbors = nyc_gdf[~nyc_gdf.geometry.disjoint(zone.geometry)].objectid.tolist()
# remove own zone number from the list
neighbors = [int(num) for num in neighbors if zone.objectid != num]
#add zone neighbors to neighbor dictionary
neighbor_dict[index + 1] = neighbors
neighbor_dict
{1: [],
2: [30, 132],
3: [32, 51, 81, 184, 185, 242, 254],
4: [79, 148, 224, 232],
5: [84, 99, 204],
6: [115, 118, 214, 221],
7: [146, 179, 193, 207, 226, 223, 260],
8: [179, 223],
9: [16, 73, 98, 121, 171, 192],
10: [41, 43, 151, 166],
11: [205, 215, 216, 218, 219],
12: [14, 22, 67],
13: [13, 88, 261],
14: [12, 231, 261],
15: [20, 94, 136, 174, 241],
16: [33, 40, 52, 65, 97, 106, 181],
17: [11, 67, 227, 228],
18: [16, 171, 252],
19: [11, 21, 26, 67],
20: [99, 118, 156, 187, 251],
21: [9, 15, 64, 98, 171, 175],
22: [37, 49, 61, 80, 217, 225],
23: [64, 101, 175, 191],
24: [18, 31, 47, 78, 94, 174],
25: [22, 26, 108, 123, 178],
26: [22, 21, 67, 89, 111, 133, 178, 227],
27: [201],
28: [93, 121, 134, 130, 131, 135, 197],
29: [25, 52, 54, 65, 66],
30: [55, 108, 150, 123, 210],
31: [20, 32, 78, 174, 185, 240, 242, 254],
32: [3, 31, 185, 254],
33: [2],
34: [49, 66, 97, 217, 256],
35: [61, 72, 76, 77, 177],
36: [37, 80, 96, 198],
37: [17, 36, 63, 80, 96, 177, 225],
38: [139, 191, 203, 205],
39: [72, 76, 91, 155, 222],
40: [25, 52, 54, 106, 195, 228],
41: [24, 42, 43, 74, 75, 152, 166],
42: [144, 148, 209, 231, 232],
43: [],
44: [41, 74, 116, 120, 152, 166],
45: [24, 41, 75, 151, 142, 163, 239, 236, 237, 238],
46: [84, 99, 204],
47: [20, 59, 69, 78, 94, 167, 169],
48: [50, 68, 100, 142, 143, 163, 230, 246],
49: [17, 34, 61, 97, 189, 217],
50: [48, 142, 143, 246],
51: [3, 81, 184],
52: [25, 33, 40, 54],
53: [92, 138, 252],
54: [33, 40, 52, 195],
55: [29, 108],
56: [57, 82, 93, 95, 173, 196],
57: [56, 93, 173],
58: [184, 208],
59: [47, 60, 78, 167],
60: [59, 78, 126, 167, 212, 213, 242, 248],
61: [17, 35, 49, 62, 72, 177, 189, 225],
62: [61, 72, 189, 190, 188],
63: [37, 76, 96, 124, 177, 180, 258],
64: [16, 19, 101, 175],
65: [25, 33, 66, 97],
66: [35, 39, 61, 62, 71, 76, 91, 188],
67: [33, 34, 65, 97],
68: [11, 14, 22, 26, 227],
69: [48, 90, 100, 158, 186, 246, 249],
70: [9, 92, 171, 192],
71: [47, 159, 167, 168, 169, 247],
72: [93, 129, 138, 173],
73: [72, 85, 89, 91, 188],
74: [4, 107, 113, 114, 148, 224, 232, 234],
75: [17, 36, 37, 112, 157, 198, 217, 255, 256],
76: [41, 42, 75, 194],
77: [71, 89, 188],
78: [117],
79: [41, 43, 74, 236, 263, 262],
80: [35, 39, 63, 72, 77, 124, 177, 222],
81: [35, 76, 177],
82: [20, 31, 47, 59, 60, 242],
83: [3, 51, 184, 254, 259],
84: [88, 209, 261],
85: [56, 83, 129, 157, 160, 173, 196],
86: [82, 129, 157, 260],
87: [5, 44, 99, 109, 118, 204],
88: [12, 87, 261],
89: [26, 71, 85, 91, 133, 165, 178, 190, 188, 257],
90: [68, 113, 186, 234, 249],
91: [114, 158, 211, 231, 249],
92: [39, 72, 71, 89, 149, 155, 165],
93: [53, 73, 93, 171, 192, 253, 252],
94: [28, 56, 57, 70, 92, 95, 134, 135, 138, 173, 192, 253],
95: [18, 20, 47, 136, 169, 235],
96: [56, 93, 96, 102, 134, 196],
97: [36, 37, 63, 95, 102, 134, 197, 198, 258],
98: [25, 34, 49, 65, 66, 181, 189],
99: [9, 16, 121, 131, 175, 191, 192],
100: [5, 23, 44, 84, 109, 118, 204],
101: [48, 68, 161, 164, 186, 230],
102: [19, 64],
103: [95, 96, 160, 196, 198],
104: [],
105: [],
106: [],
107: [25, 40, 181, 195, 228],
108: [79, 113, 137, 164, 170, 224, 234],
109: [21, 29, 55, 123],
110: [84, 99, 110, 118, 176],
111: [109, 176],
112: [26, 133, 227, 228, 257],
113: [80, 255],
114: [79, 90, 107, 114, 234, 249],
115: [79, 125, 113, 144, 211, 249],
116: [6, 118, 221, 245, 251],
117: [42, 120, 152, 244],
118: [86, 201],
119: [6, 23, 84, 99, 109, 115, 172, 176, 214, 251],
120: [235, 247],
121: [42, 116, 127, 243, 244],
122: [91, 123, 154, 155, 165, 210],
123: [29, 154, 210],
124: [9, 28, 98, 131, 135, 192],
125: [130, 131, 191, 205],
126: [21, 29, 108, 149, 165, 178, 210],
127: [63, 76, 132, 180, 216],
128: [60, 147, 167, 168, 212, 213],
129: [120, 128, 243],
130: [127, 220, 243],
131: [70, 82, 83, 138, 173, 207, 223, 260],
132: [28, 93, 95, 96, 130, 197],
133: [24, 43, 238],
134: [28, 122, 134, 131, 197, 205, 215, 216],
135: [38, 203, 205, 218, 219],
136: [28, 98, 121, 122, 130, 175, 191],
137: [2, 124, 203, 216, 219],
138: [26, 89, 111, 257],
139: [141, 229, 263, 262],
140: [28, 93, 121, 192],
141: [18, 94, 153, 220, 235, 241],
142: [107, 170, 224, 233],
143: [140, 162, 229, 236, 237, 263, 262],
144: [43, 48, 50, 143, 163, 239],
145: [41, 42, 116, 166],
146: [53, 70, 93, 129, 199, 223],
147: [48, 50, 142, 239],
148: [45, 114, 148, 211, 231],
149: [146, 193, 226],
150: [7, 145, 193, 226],
151: [126, 159, 167, 168],
152: [4, 45, 79, 144, 232],
153: [136, 220],
154: [149, 150, 155, 210],
155: [39, 91, 149, 154],
156: [23, 187],
157: [69, 147, 167, 168],
158: [80, 82, 83, 160, 198, 226, 260],
159: [68, 125, 246, 249],
160: [82, 102, 157, 196, 198],
161: [100, 162, 163, 164, 170, 230],
162: [141, 161, 163, 170, 229, 233, 237],
163: [43, 48, 142, 161, 162, 230, 237],
164: [100, 107, 161, 170, 186, 230, 234],
165: [107, 137, 161, 162, 164, 233, 234],
166: [89, 91, 149, 123, 178],
167: [24, 41, 42, 152],
168: [47, 59, 60, 69, 126, 147, 159],
169: [21, 26, 89, 123, 165],
170: [69, 126, 147, 159, 247],
171: [47, 69, 94, 235, 247],
172: [9, 15, 16, 73, 92, 252],
173: [118, 176, 214],
174: [56, 57, 70, 82, 93, 129],
175: [18, 20, 31, 240, 241],
176: [16, 19, 64, 98, 131, 191],
177: [7, 8, 193, 223],
178: [109, 110, 118, 172],
179: [35, 37, 61, 63, 76, 77, 225],
180: [63, 124, 197, 216, 258],
181: [25, 97, 106, 189, 190, 228, 257],
182: [212, 242, 248, 250],
183: [184, 208, 242],
184: [3, 51, 58, 81, 183, 208, 242],
185: [3, 31, 32, 242],
186: [68, 90, 100, 164, 234],
187: [49, 61, 62, 97, 181, 190],
188: [62, 89, 181, 189, 188, 257],
189: [23, 156, 206, 251],
190: [62, 72, 71, 85, 89, 190],
191: [19, 38, 98, 122, 131, 175, 205],
192: [9, 73, 92, 93, 98, 121, 135],
193: [7, 145, 146, 179],
194: [74],
195: [56, 82, 95, 102, 160],
196: [40, 54, 106, 228],
197: [38, 139, 132, 219],
198: [28, 96, 134, 130, 180, 215, 216, 258],
199: [36, 80, 96, 102, 157, 160],
200: [138],
201: [60, 126, 182, 213, 248, 250],
202: [60, 126, 212, 208, 250],
203: [220, 240],
204: [5, 44, 84, 99],
205: [27, 117],
206: [],
207: [10, 38, 122, 130, 139, 191, 215, 218],
208: [187, 221, 245, 251],
209: [7, 129, 223, 260],
210: [58, 183, 184, 213, 242, 250],
211: [45, 87, 231, 261],
212: [29, 149, 150, 123, 154],
213: [125, 114, 144, 231],
214: [6, 118, 172],
215: [10, 130, 197, 205, 216],
216: [10, 124, 130, 132, 180, 197, 215, 219],
217: [17, 34, 49, 80, 256],
218: [10, 139, 205, 219],
219: [10, 139, 132, 203, 216, 218],
220: [17, 37, 61, 177],
221: [128, 136, 153, 200, 240, 241],
222: [4, 79, 107, 137],
223: [7, 145, 146, 157, 260],
224: [6, 115, 206, 245],
225: [39, 76],
226: [7, 8, 129, 138, 179, 207],
227: [14, 26, 67, 111, 228],
228: [14, 40, 106, 111, 181, 195, 227, 257],
229: [140, 141, 162, 233, 237],
230: [48, 100, 161, 163, 164],
231: [13, 45, 125, 144, 209, 211, 261],
232: [43, 142, 143, 238],
233: [4, 45, 79, 148],
234: [137, 162, 170, 229],
235: [79, 90, 107, 113, 164, 170, 186, 249],
236: [94, 119, 136, 169, 247],
237: [43, 75, 141, 237, 263],
238: [43, 141, 162, 163, 229, 236],
239: [43, 151, 239],
240: [31, 174, 200, 220, 241, 254, 259],
241: [18, 136, 174, 220, 240],
242: [63, 96, 180, 197],
243: [3, 31, 60, 78, 182, 183, 184, 185, 208, 248, 250],
244: [115, 206, 221, 251],
245: [75, 140, 141, 236, 262],
246: [120, 127, 128, 244],
247: [116, 120, 243],
248: [92, 93],
249: [48, 50, 68, 158],
250: [69, 119, 168, 169, 235],
251: [60, 182, 212, 242],
252: [68, 90, 125, 113, 114, 158, 234],
253: [89, 111, 133, 181, 190, 228],
254: [182, 212, 213, 208, 242],
255: [23, 115, 118, 187, 206, 245],
256: [15, 53, 92, 171],
257: [3, 31, 32, 81, 240, 259],
258: [80, 112, 256],
259: [34, 80, 217, 255],
260: [81, 240, 254],
261: [7, 83, 129, 157, 207, 226],
262: [12, 13, 87, 88, 209, 231],
263: [75, 140, 141, 263]}
trips_zones_dict = {}
df = combined
# keep only trips that have PO and DO zones that aren't above 263
limit_mask = (df['PULocationID'] < 264) & (df['DOLocationID'] < 264)
# Find zones with no neighbors
dont_include = []
for x in neighbor_dict:
if neighbor_dict[x] == []:
dont_include.append(x)
# apply masks
trips_df = df[limit_mask]
# create mask to remove trips that start or end in a zone without neighbors
no_neighbors_mask = (~trips_df['PULocationID'].isin(dont_include)) & (~trips_df['DOLocationID'].isin(dont_include))
trips_df = trips_df[no_neighbors_mask]
column_names = ['tpep_pickup_datetime', 'tpep_dropoff_datetime']
for column in column_names:
trips_df[column] = pd.to_datetime(trips_df[column],format = '%m/%d/%Y %I:%M:%S %p')
def add_time_and_speed(df: pd.DataFrame) -> pd.DataFrame:
"""
Calculates the trip time and average speed of a taxi trip and creates two columns in the taxi dataframe
to store the data
:param df: taxi pandas dataframe
:return: The taxi dataframe with trip_time_h and avg speed columns
"""
df['trip_time'] = (df['tpep_dropoff_datetime'] - df['tpep_pickup_datetime'])
df['trip_time_h'] = [(x.total_seconds()) / 3600 for x in df['trip_time']]
df['avg speed'] = df['trip_distance'] / df['trip_time_h']
return df
add_time_and_speed(trips_df)
| VendorID | tpep_pickup_datetime | tpep_dropoff_datetime | passenger_count | trip_distance | RatecodeID | store_and_fwd_flag | PULocationID | DOLocationID | payment_type | ... | extra | mta_tax | tip_amount | tolls_amount | improvement_surcharge | total_amount | tripID | trip_time | trip_time_h | avg speed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 19061600 | 1 | 2018-01-01 01:00:10 | 2018-01-01 01:21:27 | 1 | 5.90 | 1 | N | 140 | 223 | 1 | ... | 0.5 | 0.5 | 4.35 | 0.0 | 0.3 | 26.15 | 19061600 | 0 days 00:21:17 | 0.354722 | 16.632733 |
| 19061552 | 2 | 2018-01-01 01:00:20 | 2018-01-01 01:08:12 | 1 | 2.52 | 1 | N | 233 | 263 | 2 | ... | 0.5 | 0.5 | 0.00 | 0.0 | 0.3 | 10.30 | 19061552 | 0 days 00:07:52 | 0.131111 | 19.220339 |
| 9246627 | 2 | 2018-01-01 13:00:20 | 2018-01-01 13:11:21 | 1 | 2.15 | 1 | N | 13 | 79 | 1 | ... | 0.0 | 0.5 | 1.20 | 0.0 | 0.3 | 12.00 | 9246627 | 0 days 00:11:01 | 0.183611 | 11.709531 |
| 9350481 | 1 | 2018-01-01 01:00:40 | 2018-01-01 01:10:07 | 1 | 0.70 | 1 | N | 234 | 164 | 2 | ... | 0.5 | 0.5 | 0.00 | 0.0 | 0.3 | 8.80 | 9350481 | 0 days 00:09:27 | 0.157500 | 4.444444 |
| 19061414 | 2 | 2018-01-01 01:00:44 | 2018-01-01 01:08:32 | 1 | 0.84 | 1 | N | 170 | 161 | 1 | ... | 0.5 | 0.5 | 0.00 | 0.0 | 0.3 | 7.80 | 19061414 | 0 days 00:07:48 | 0.130000 | 6.461538 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 202378 | 1 | 2018-12-31 00:56:01 | 2018-12-31 01:05:01 | 2 | 1.00 | 1 | N | 148 | 144 | 1 | ... | 0.5 | 0.5 | 2.60 | 0.0 | 0.3 | 11.40 | 202378 | 0 days 00:09:00 | 0.150000 | 6.666667 |
| 142435 | 2 | 2018-12-31 12:57:32 | 2018-12-31 13:03:57 | 1 | 1.22 | 1 | N | 233 | 224 | 1 | ... | 0.0 | 0.5 | 1.46 | 0.0 | 0.3 | 8.76 | 142435 | 0 days 00:06:25 | 0.106944 | 11.407792 |
| 142411 | 2 | 2018-12-31 12:57:41 | 2018-12-31 13:03:23 | 2 | 0.83 | 1 | N | 229 | 161 | 1 | ... | 0.0 | 0.5 | 1.89 | 0.0 | 0.3 | 8.19 | 142411 | 0 days 00:05:42 | 0.095000 | 8.736842 |
| 142202 | 2 | 2018-12-31 12:58:56 | 2018-12-31 13:10:48 | 1 | 0.91 | 1 | N | 142 | 143 | 1 | ... | 0.0 | 0.5 | 2.45 | 0.0 | 0.3 | 12.25 | 142202 | 0 days 00:11:52 | 0.197778 | 4.601124 |
| 202149 | 2 | 2018-12-31 00:59:06 | 2018-12-31 01:03:49 | 1 | 0.66 | 1 | N | 142 | 163 | 2 | ... | 0.5 | 0.5 | 0.00 | 0.0 | 0.3 | 6.30 | 202149 | 0 days 00:04:43 | 0.078611 | 8.395760 |
572227 rows × 21 columns
zone_slide = ipywidgets.IntSlider(value=88, min=2, max=263)
@ipywidgets.interact(zone = zone_slide)
def plot_zone_speeds(zone:int):
sub1 = trips_df[trips_df['PULocationID'] == zone]
sub2 = trips_df[trips_df['DOLocationID'] == zone]
sub = pd.concat([sub1,sub2])
# print((sub))
plt.hist(sub['avg speed'], bins = 60)
plt.xlabel('Average Speed (mph)')
plt.ylabel('Number of trips')
return plt.show()
interactive(children=(IntSlider(value=88, description='zone', max=263, min=2), Output()), _dom_classes=('widge…
def plot_routes_for_random_addresses_in_2_zones(gdf:gpd.GeoDataFrame, zone1: int, zone2: int):
gdf.geometry.to_crs(4326)
zone1_bbox = gdf.loc[gdf['objectid'] == str(zone1)]['geometry'].item()
g_z1 = ox.graph_from_polygon(zone1_bbox,network_type='drive',retain_all=True)
g_z1 = ox.utils_graph.get_undirected(g_z1)
random_address_z1 = ox.utils_geo.sample_points(g_z1,1).to_crs(4326)
raz1_list = random_address_z1.to_list()
raz1_lat = float((str(raz1_list).split(' '))[1].strip('('))
raz1_lon = float((str(raz1_list).split(' '))[2].strip(')>]'))
zone2_bbox = gdf.loc[gdf['objectid'] == str(zone2)]['geometry'].item()
g_z2 = ox.graph_from_polygon(zone2_bbox)
g_z2 = ox.utils_graph.get_undirected(g_z2)
random_address_z2 = ox.utils_geo.sample_points(g_z2, 1).to_crs(4326)
raz2_list = random_address_z2.to_list()
raz2_lat = float((str(raz2_list).split(' '))[1].strip('('))
raz2_lon = float((str(raz2_list).split(' '))[2].strip(')>]'))
g = ox.graph_from_place('New York City, NY, USA', network_type = 'drive')
random_node_z1 = ox.nearest_nodes(g,raz1_lat,raz1_lon)
random_node_z2 = ox.nearest_nodes(g,raz2_lat,raz2_lon)
route = ox.shortest_path(g, random_node_z1, random_node_z2, weight='length')
G = nx.compose(g_z1, g_z2).to_undirected()
c = ox.graph_to_gdfs(G, edges=False).unary_union.centroid
bbox = ox.utils_geo.bbox_from_point(point=(c.y, c.x), dist=5000)
fig, ax = ox.plot_graph_route(g, route, 'g', show=False, close=False, node_size=1, node_color='w', edge_color='w', edge_linewidth=0.05,bbox = bbox)
random_address_z1.plot(color = 'r', ax = ax)
random_address_z2.plot(color = 'b', ax = ax)
return
plot_routes_for_random_addresses_in_2_zones(nyc_gdf, 132, 219)
/Users/tzacks2/.conda/envs/DataViz/lib/python3.10/site-packages/osmnx/utils_geo.py:50: UserWarning: Geometry is in a geographic CRS. Results from 'interpolate' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. return lines.interpolate(np.random.rand(n), normalized=True) /Users/tzacks2/.conda/envs/DataViz/lib/python3.10/site-packages/osmnx/utils_geo.py:50: UserWarning: Geometry is in a geographic CRS. Results from 'interpolate' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. return lines.interpolate(np.random.rand(n), normalized=True)
street_closures = pd.read_csv('2018_street_closures.csv')
g = ox.graph_from_place('NYC, NY, USA', network_type='drive')
g_gdf = ox.graph_to_gdfs(g,nodes=False, edges=True, node_geometry=False, fill_edge_geometry=False)
nyc = g_gdf
nyc = nyc[~nyc['name'].apply(lambda x: isinstance(x, list))]
# create sample dataframe
# define function to remove suffix from street name
def remove_suffix(name):
# check if value is a string
if isinstance(name, str):
# apply regular expression to remove suffix
return re.sub(r'(?<=\d)(st|nd|rd|th)\b', '', name)
# if value is not a string or is NaN, return NaN
else:
return np.nan
# apply function to 'name' column and overwrite existing values
nyc['name'] = nyc['name'].apply(remove_suffix)
/Users/tzacks2/.conda/envs/DataViz/lib/python3.10/site-packages/geopandas/geodataframe.py:1443: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super().__setitem__(key, value)
nyc[['name','geometry']].dropna()
| name | geometry | |||
|---|---|---|---|---|
| u | v | key | ||
| 39076461 | 274283981 | 0 | Cross Island Parkway | LINESTRING (-73.79463 40.78641, -73.79309 40.7... |
| 39076490 | 277672005 | 0 | Cross Island Parkway | LINESTRING (-73.75709 40.76243, -73.75741 40.7... |
| 39076504 | 462122450 | 0 | Cross Island Parkway | LINESTRING (-73.74416 40.75347, -73.74552 40.7... |
| 42421728 | 42432736 | 0 | Central Park West | LINESTRING (-73.96004 40.79805, -73.95996 40.7... |
| 42435337 | 0 | Central Park West | LINESTRING (-73.96004 40.79805, -73.96011 40.7... | |
| ... | ... | ... | ... | ... |
| 10840270527 | 42842442 | 0 | 25 Avenue | LINESTRING (-73.81729 40.77633, -73.81721 40.7... |
| 10840270528 | 42808526 | 0 | Willets Point Boulevard | LINESTRING (-73.81729 40.77648, -73.81752 40.7... |
| 10853807645 | 4616536348 | 0 | Broome Street | LINESTRING (-74.00692 40.72449, -74.00616 40.7... |
| 10858288590 | 42437678 | 0 | West 34 Street | LINESTRING (-74.00503 40.75707, -74.00491 40.7... |
| 246650510 | 0 | 12 Avenue | LINESTRING (-74.00503 40.75707, -74.00512 40.7... |
91754 rows × 2 columns
ax = nyc.geometry.plot(linewidth = 0.05)
nyc_gdf.plot(ax = ax,cmap = 'Pastel1',linewidth = .2)
sec_ave = nyc.loc[nyc['name'] == '2 Avenue'].geometry
sec_ave.plot(color = 'r', ax = ax, linewidth = 2.5,linestyle = '--')
ax.set_title('2nd Avenue Outlined on Zone Map with Street View')
u v key
42423020 42434965 0 LINESTRING (-73.97497 40.74609, -73.97502 40.7...
42424089 42442961 0 LINESTRING (-73.96436 40.76062, -73.96440 40.7...
42429340 42443037 0 LINESTRING (-73.95174 40.77788, -73.95181 40.7...
42429693 42443000 0 LINESTRING (-73.95934 40.76750, -73.95938 40.7...
42429900 42442891 0 LINESTRING (-73.97953 40.73979, -73.97957 40.7...
...
7925143983 3931534871 0 LINESTRING (-73.93434 40.80137, -73.93439 40.8...
8132470015 42496182 0 LINESTRING (-73.99439 40.67140, -73.99484 40.6...
42496175 0 LINESTRING (-73.99439 40.67140, -73.99393 40.6...
9924890061 42496120 0 None
42496133 0 LINESTRING (-74.00644 40.65918, -74.00617 40.6...
Name: geometry, Length: 245, dtype: geometry
Text(0.5, 1.0, '2nd Avenue Outlined on Zone Map with Street View')
def open_file(path: str) -> pd.DataFrame:
return pd.read_csv(path)
def format_index(df: pd.DataFrame, new_index_name: str) -> pd.DataFrame:
df.rename(columns={'Unnamed: 0': new_index_name}, inplace=True)
df.set_index(new_index_name)
return df
def keep_relevant_columns(df: pd.DataFrame, column_names: list) -> pd.DataFrame:
df = df[column_names]
return df
def datetime_conversions(df: pd.DataFrame, column_names: list, time_format: str) -> pd.DataFrame:
for column in column_names:
df[column] = pd.to_datetime(df[column], format=time_format)
return df
crashes_data = format_index(open_file("2018_crashes.csv"), 'index')
crashes_data = crashes_data.loc[(crashes_data['LATITUDE'] >= 40) & (crashes_data['LATITUDE'] <= 41) & (
crashes_data['LONGITUDE'] >= -74.5) & (crashes_data['LONGITUDE'] <= -73)]
crashes_data = keep_relevant_columns(crashes_data, ['index', 'CRASH DATE_CRASH TIME', 'LOCATION'])
crashes_data = datetime_conversions(crashes_data, ['CRASH DATE_CRASH TIME'], '%Y-%m-%d %H:%M:%S')
crashes_data = crashes_data.dropna()
print(crashes_data.head(10))
index CRASH DATE_CRASH TIME LOCATION 0 0 2018-01-01 04:16:00 (40.8018, -73.96108) 1 1 2018-01-01 20:30:00 (40.743973, -73.8851) 2 2 2018-01-01 23:41:00 (40.72143, -73.892746) 3 3 2018-01-01 15:30:00 (40.80174, -73.96477) 4 4 2018-01-01 15:00:00 (40.666225, -73.80086) 5 5 2018-01-01 12:10:00 (40.763073, -73.816345) 6 6 2018-01-01 18:35:00 (40.820305, -73.89083) 7 7 2018-01-01 13:50:00 (40.65892, -73.889824) 8 8 2018-01-01 01:37:00 (40.662277, -73.91078) 9 9 2018-01-01 11:50:00 (40.5744, -74.099304)
street_closures = open_file("2018_street_closures.csv")
street_geometries = open_file("street_geometries.csv")
street_closures['ONSTREETNAME'] = street_closures['ONSTREETNAME'].str.lower()
street_closures['ONSTREETNAME'] = street_closures['ONSTREETNAME'].str.replace('\s+', ' ', regex=True)
street_geometries['name'] = street_geometries['name'].str.lower()
merged_streets_df = pd.merge(street_closures, street_geometries, left_on='ONSTREETNAME', right_on='name')
merged_streets_df = keep_relevant_columns(merged_streets_df, ['ONSTREETNAME', 'WORK_START_DATE', 'WORK_END_DATE',
'geometry'])
merged_streets_df = merged_streets_df.dropna()
merged_streets_df = datetime_conversions(merged_streets_df, ['WORK_START_DATE', 'WORK_END_DATE'],
'%Y-%m-%d %H:%M:%S')
print(merged_streets_df.head(10))
ONSTREETNAME WORK_START_DATE WORK_END_DATE \
0 douglas road 2018-01-22 10:54:36 2018-02-20 23:58:59
1 douglas road 2018-01-22 10:54:36 2018-02-20 23:58:59
2 douglas road 2018-01-22 10:54:36 2018-02-20 23:58:59
3 douglas road 2018-01-22 10:54:36 2018-02-20 23:58:59
4 douglas road 2018-01-22 10:54:36 2018-02-20 23:58:59
5 douglas road 2018-01-22 10:54:36 2018-02-20 23:58:59
6 douglas road 2018-01-22 10:54:36 2018-02-20 23:58:59
7 douglas road 2018-01-22 10:54:36 2018-02-20 23:58:59
8 douglas road 2018-01-22 10:54:36 2018-02-20 23:58:59
9 douglas road 2018-01-22 10:54:36 2018-02-20 23:58:59
geometry
0 LINESTRING (-73.746055 40.777191, -73.7460243 ...
1 LINESTRING (-73.746055 40.777191, -73.7460867 ...
2 LINESTRING (-73.7455875 40.7734982, -73.745322...
3 LINESTRING (-73.745251 40.774248, -73.7452656 ...
4 LINESTRING (-73.746232 40.777863, -73.7460867 ...
5 LINESTRING (-73.745916 40.77652, -73.7460243 4...
6 LINESTRING (-73.745642 40.775624, -73.7454011 ...
7 LINESTRING (-73.745266 40.774783, -73.7451997 ...
8 LINESTRING (-74.095379 40.607137, -74.095526 4...
9 LINESTRING (-74.095379 40.607137, -74.095277 4...
def convert_to_geometry_point(coords):
if coords.startswith('LINESTRING'):
coords = coords.replace("LINESTRING (", "").replace(")", "")
coords = [tuple(map(float, c.split())) for c in coords.split(",")]
multipoint = [(float(lon), float(lat)) for lon, lat in coords]
geo_point = LineString(multipoint)
elif type(coords) == str:
lat, lon = coords.strip("()").split(',')
geo_point = Point(float(lon), float(lat))
else:
raise TypeError('Invalid location type. Must be string or list of coordinates.')
return geo_point
def add_zone_to_event(df, nyc_geo, col_name):
crs = 'EPSG:4326'
df['ZONE'] = [list() for x in range(len(df.index))]
taxi_zones = nyc_geo.zone
taxi_zones_boundaries = nyc_geo.geometry
df['geometry'] = df[col_name].apply(lambda x: convert_to_geometry_point(x))
data_gdf = gpd.GeoDataFrame(df, geometry='geometry', crs=crs)
for i, row in data_gdf.iterrows():
coords = row['geometry']
for boundary, zone in zip(taxi_zones_boundaries, taxi_zones):
if boundary.contains(coords):
data_gdf.at[i, 'ZONE'].append(zone)
break
return data_gdf
# load the crashes and zones data
crashes_data_gdf = gpd.read_file('crashes.geojson')
# plot the zones
fig, ax = plt.subplots(figsize=(50, 50))
taxi_zones_boundaries = nyc_gdf.geometry.boundary
taxi_zones_boundaries.plot(ax=ax, alpha=0.5, markersize=2, edgecolor='blue')
# plot the crashes
crashes_data_gdf.plot(ax=ax, markersize=10, color='red', marker='o')
# set the title and show the plot
ax.set_title('Crashes and Taxi Zones in NYC')
Text(0.5, 1.0, 'Crashes and Taxi Zones in NYC')